Subset and recluster
Subset
# subset
tcells <- subset(tcells.unanotated,
merged_clusters == "T cells & ILCs")
# plot
tcells_colors <- c("azure3")
DimPlot(object = tcells,
reduction = "umap",
cols = tcells_colors)

# create new object
meta <- tcells@meta.data
meta <- meta[,c(4:13,25,26)]
colnames(meta)[c(11,12)] <- c("original_individual_clusters","original_merged_clusters")
counts <- GetAssayData(object = tcells, slot = "counts")
all.equal(rownames(meta),colnames(counts))
## [1] TRUE
tcells <- CreateSeuratObject(counts,
meta.data = meta)
Normalize and integrate
- Apply sctransform normalization
- Note that this single command replaces NormalizeData(), ScaleData(), and FindVariableFeatures()
# Split object by sample
Idents(tcells) <- tcells$sample
tcells.split <- SplitObject(tcells, split.by = "sample")
# SCTransform - normalize, scale, find variable features
tcells.split <- lapply(tcells.split, SCTransform)
# Find variable features of split object
var.features <- SelectIntegrationFeatures(tcells.split)
# Merge the samples
tcells.sct.merged <- merge(x = tcells.split[[1]],
y = c(tcells.split[[2]], tcells.split[[3]],
tcells.split[[4]], tcells.split[[5]],
tcells.split[[6]], tcells.split[[7]],
tcells.split[[8]]))
# Set variables features for merged object
VariableFeatures(tcells.sct.merged) <- var.features
# Run PCA on the merged object
tcells.sct.merged <- RunPCA(object = tcells.sct.merged, assay = "SCT")
# Harmony dimensional reduction
tcells.integrated <- RunHarmony(object = tcells.sct.merged,
group.by.vars = "sample",
assay.use = "SCT",
reduction = "pca")
# Cleanup
remove(tcells.unanotated,counts,tcells,tcells.split,tcells.sct.merged)
UMAP and clustering
# Find significant PCs
stdv <- tcells.integrated[["pca"]]@stdev
sum.stdv <- sum(tcells.integrated[["pca"]]@stdev)
percent.stdv <- (stdv / sum.stdv) * 100
cumulative <- cumsum(percent.stdv)
co1 <- which(cumulative > 90 & percent.stdv < 5)[1]
co2 <- sort(which(
(percent.stdv[1:length(percent.stdv) - 1] -
percent.stdv[2:length(percent.stdv)]) > 0.1),
decreasing = T)[1] + 1
min.pc <- min(co1, co2)
min.pc
# Run UMAP
tcells.integrated <- RunUMAP(tcells.integrated,
dims = 1:min.pc,
reduction = "harmony",
n.components = 3) # set to 3 to use with VR
# Determine the K-nearest neighbor graph
tcells.unannotated <- FindNeighbors(object = tcells.integrated,
assay = "SCT",
reduction = "harmony",
dims = 1:min.pc)
# Determine the clusters for various resolutions
tcells.unannotated <- FindClusters(object = tcells.unannotated,
algorithm = 1, # 1 = Louvain
resolution = seq(0.1, 0.5,by = 0.1))
tcells.unannotated$seurat_clusters <- tcells.unannotated$SCT_snn_res.0.2
Idents(tcells.unannotated) <- "seurat_clusters"
# Reset
DefaultAssay(tcells.unannotated) <- "RNA"
tcells.unannotated <- NormalizeData(tcells.unannotated)
tcells.unannotated
# save
saveRDS(tcells.unannotated, "../../rObjects/mouse_tcells_unannotated.rds")
Explore resolutions
# 0.1
DimPlot(tcells.unannotated,
group.by = "SCT_snn_res.0.1",
label = TRUE)

# 0.2
DimPlot(tcells.unannotated,
group.by = "SCT_snn_res.0.2",
label = TRUE)

# 0.3
DimPlot(tcells.unannotated,
group.by = "SCT_snn_res.0.3",
label = TRUE)

# 0.4
DimPlot(tcells.unannotated,
group.by = "SCT_snn_res.0.4",
label = TRUE)

# 0.5
DimPlot(tcells.unannotated,
group.by = "SCT_snn_res.0.5",
label = TRUE)

2D Unannotated UMAP
# Resolution of 0.1, new clusters
cluster_colors <- c("firebrick1","gold","green", "darkgreen","cyan","blue",
"plum1","azure3")
DimPlot(tcells.unannotated,
cols = cluster_colors)

3D Unannotated UMAP
embeddings <- tcells.unannotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(tcells.unannotated$seurat_clusters))
colnames(embeddings)[4] <- "seurat_clusters"
embeddings <- as.data.frame(embeddings)
three.dim <- plot_ly(embeddings,
x = ~UMAP_1,
y = ~UMAP_2,
z = ~UMAP_3,
color = ~seurat_clusters,
colors = cluster_colors,
size = 1)
three.dim <- three.dim %>% add_markers()
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = 'UMAP_1'),
yaxis = list(title = 'UMAP_2'),
zaxis = list(title = 'UMAP_3')))
three.dim
Potential markers
Sandro’s markers
goi1 <- c("Pdgfrb","Rgs5","Pecam1","Vwf","Cldn5","Kdr","Prox1","Flt4","Ptprc",
"Itgam","Aif1","Mrc1","H2-Eb1","Itgax","Ccr2","Ly6c2","Ly6g","Kit",
"Mki67","Col1a2","Plp1","Trbc2","Gata3","Il7r","Cd19","Ms4a1")
v1 <- VlnPlot(tcells.unannotated,
features = goi1,
split.by = "seurat_clusters",
cols = cluster_colors,
flip = TRUE,
stack = TRUE) + theme(legend.position = "none")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
v1

Automatically detect markers
# work in parallel
options(mc.cores = detectCores() - 1)
# Find markers for each cluster
# Compares single cluster vs all other clusters
# Default is logfc.threshold = 0.25, min.pct = 0.5
Idents(tcells.unannotated) <- "seurat_clusters"
all.markers <- FindAllMarkers(object = tcells.unannotated,
assay = "RNA",
test.use = "MAST",
verbose = TRUE)
# add column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)
# rename columns and rows
rownames(all.markers) <- 1:nrow(all.markers)
all.markers <- all.markers[,c(6,7,1,5,2:4,8)]
colnames(all.markers)[c(6,7)] <- c("pct_1","pct_2")
# save
saveRDS(all.markers, "../../rObjects/mouse_tcells_markers.rds")
# more stringent filtering
all.markers <- all.markers[all.markers$p_val_adj < 0.01,]
# compare
table(all.markers$cluster)
##
## 0 1 2 3 4 5 6 7
## 3011 3363 2540 1424 756 2618 731 814
# subset
cluster0 <- all.markers[all.markers$cluster == 0,]
cluster1 <- all.markers[all.markers$cluster == 1,]
cluster2 <- all.markers[all.markers$cluster == 2,]
cluster3 <- all.markers[all.markers$cluster == 3,]
cluster4 <- all.markers[all.markers$cluster == 4,]
cluster5 <- all.markers[all.markers$cluster == 5,]
cluster6 <- all.markers[all.markers$cluster == 6,]
cluster7 <- all.markers[all.markers$cluster == 7,]
# save
write.table(all.markers,
"../../results/reclusterTcell/DEGs/putative_cluster_markers_pvaladj_0.01.tsv",
sep = "\t", quote = FALSE, row.names = FALSE)
Cluster Annotations
Cluster 0: ?
# Top 20 markers based on p_val_adj and then avg_log2FC
VlnPlot(tcells.unannotated,
features = cluster0$gene[1:10],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

VlnPlot(tcells.unannotated,
features = cluster0$gene[11:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

Cluster 1: ?
# Top 20 markers based on p_val_adj and then avg_log2FC
VlnPlot(tcells.unannotated,
features = cluster1$gene[1:10],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

VlnPlot(tcells.unannotated,
features = cluster1$gene[11:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

Cluster 2: ?
# Top 20 markers based on p_val_adj and then avg_log2FC
VlnPlot(tcells.unannotated,
features = cluster2$gene[1:10],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

VlnPlot(tcells.unannotated,
features = cluster2$gene[11:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

Cluster 3: ?
# Top 20 markers based on p_val_adj and then avg_log2FC
VlnPlot(tcells.unannotated,
features = cluster3$gene[1:10],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

VlnPlot(tcells.unannotated,
features = cluster3$gene[11:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

Cluster 4: ?
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(tcells.unannotated,
features = cluster4$gene[1:10],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

VlnPlot(tcells.unannotated,
features = cluster4$gene[11:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

Cluster 5: ?
Cluster 6: ?
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(tcells.unannotated,
features = cluster6$gene[1:10],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

VlnPlot(tcells.unannotated,
features = cluster6$gene[11:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

Cluster 7: ?
# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(tcells.unannotated,
features = cluster7$gene[1:10],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

VlnPlot(tcells.unannotated,
features = cluster7$gene[11:20],
cols = cluster_colors,
stack = TRUE,
flip = TRUE,
split.by = "seurat_clusters")

Individual clusters
# Rename all identities
#tcells.annotated <- RenameIdents(object = tcells.unannotated,
# "0" = "Kdr-high Rgcc+ BECs",
# "1" = "Kdr-high BECs",
# "2" = "Vwf+ BECs",
# "3" = "Vwf+ Clnd5+ BECs",
# "4" = "Kdr-high Cldn5+ BECs",
# "5" = "Doublets BECs/tcellsphages",
# "6" = "LECs")
#tcells.annotated$seurat_clusters <- Idents(tcells.annotated)
#tcells.annotated$individual_clusters <- Idents(tcells.annotated)
Clustering QC after annotation
3D UMAP
embeddings <- tcells.unannotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(tcells.unannotated$seurat_clusters))
colnames(embeddings)[4] <- "seurat_clusters"
embeddings <- as.data.frame(embeddings)
three.dim <- plot_ly(embeddings,
x = ~UMAP_1,
y = ~UMAP_2,
z = ~UMAP_3,
color = ~seurat_clusters,
colors = cluster_colors,
size = 1)
three.dim <- three.dim %>% add_markers()
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = '1'),
yaxis = list(title = '2'),
zaxis = list(title = '3')))
three.dim
Isoform, sex, age
# Apoe isoform
umap1 <- DimPlot(object = tcells.unannotated,
group.by = "seurat_clusters",
reduction = "umap",
split.by = "isoform",
repel = TRUE,
cols = cluster_colors)
umap1

# age
umap2 <- DimPlot(object = tcells.unannotated,
group.by = "seurat_clusters",
reduction = "umap",
split.by = "age",
repel = TRUE,
cols = cluster_colors)
umap2

# sex
umap3 <- DimPlot(object = tcells.unannotated,
group.by = "seurat_clusters",
reduction = "umap",
split.by = "sex",
repel = TRUE,
cols = cluster_colors)
umap3

# sample
umap4 <- DimPlot(object = tcells.unannotated,
group.by = "seurat_clusters",
reduction = "umap",
split.by = "sample",
ncol = 2,
repel = TRUE,
cols = cluster_colors)
umap4

# phase
umap5 <- DimPlot(object = tcells.unannotated,
group.by = "seurat_clusters",
reduction = "umap",
split.by = "Phase",
cols = cluster_colors,
repel = TRUE)
umap5

# mito.factor
umap6 <- DimPlot(object = tcells.unannotated,
group.by = "seurat_clusters",
reduction = "umap",
split.by = "mito.factor",
cols = cluster_colors,
ncol = 2,
repel = TRUE)
umap6

Feature plots
# UMAP percent.mt
f1 <- FeaturePlot(tcells.unannotated,
reduction = "umap",
features = "percent.mt") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f1

# UMAP nCount
f2 <- FeaturePlot(tcells.unannotated,
reduction = "umap",
features = "nCount_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f2

# UMAP nFeature
f3 <- FeaturePlot(tcells.unannotated,
reduction = "umap",
features = "nFeature_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f3

# UMAP percent.ribo
f4 <- FeaturePlot(tcells.unannotated,
reduction = "umap",
features = "percent.ribo.protein") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f4

Ttr
VlnPlot(tcells.unannotated,
features = "Ttr",
split.by = "sample",
group.by = "sample",
cols = sample_colors)

VlnPlot(tcells.unannotated,
features = "Ttr",
split.by = "seurat_clusters",
cols = cluster_colors)

FeaturePlot(object = tcells.unannotated,
features = "Ttr")

Percent cells per cluster
# isoform
b1 <- tcells.unannotated@meta.data %>%
group_by(seurat_clusters, isoform) %>%
dplyr::count() %>%
group_by(seurat_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=seurat_clusters,y=percent, fill=isoform)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = isoform_colors) +
ggtitle("Percentage of isoform per cluster") +
theme(axis.text.x = element_text(angle = 90))
b1

# sex
b2 <- tcells.unannotated@meta.data %>%
group_by(seurat_clusters, sex) %>%
dplyr::count() %>%
group_by(seurat_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=seurat_clusters,y=percent, fill=sex)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = sex_colors) +
ggtitle("Percentage of sex per cluster") +
theme(axis.text.x = element_text(angle = 90))
b2

# age
b3 <- tcells.unannotated@meta.data %>%
group_by(seurat_clusters, age) %>%
dplyr::count() %>%
group_by(seurat_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=seurat_clusters,y=percent, fill=age)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = age_colors) +
ggtitle("Percentage of age per cluster") +
theme(axis.text.x = element_text(angle = 90))
b3

# sample
b4 <- tcells.unannotated@meta.data %>%
group_by(seurat_clusters, sample) %>%
dplyr::count() %>%
group_by(seurat_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=seurat_clusters,y=percent, fill=sample)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = sample_colors) +
ggtitle("Percentage of age per cluster") +
theme(axis.text.x = element_text(angle = 90))
# phase
b5 <- tcells.unannotated@meta.data %>%
group_by(seurat_clusters, Phase) %>%
dplyr::count() %>%
group_by(seurat_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=seurat_clusters,y=percent, fill=Phase)) +
theme_classic() +
geom_col() +
ggtitle("Percentage of age per cluster") +
theme(axis.text.x = element_text(angle = 90))
b5

# mito.factor
b6 <- tcells.unannotated@meta.data %>%
group_by(seurat_clusters, mito.factor) %>%
dplyr::count() %>%
group_by(seurat_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=seurat_clusters,y=percent, fill=mito.factor)) +
theme_classic() +
geom_col() +
ggtitle("Percentage of age per cluster") +
theme(axis.text.x = element_text(angle = 90))
b6

Number cells per cluster
# clusters
cluster_ncells <- FetchData(tcells.unannotated,
vars = c("seurat_clusters")) %>%
dplyr::count(seurat_clusters)
cluster_ncells
## seurat_clusters n
## 1 0 930
## 2 1 505
## 3 2 405
## 4 3 319
## 5 4 202
## 6 5 163
## 7 6 118
## 8 7 82
write.table(cluster_ncells,
"../../results/reclusterTcell/numCells/cells_per_cluster_unannotated.tsv",
quote = FALSE, sep = "\t", row.names = FALSE)
# sample
sample_ncells <- FetchData(tcells.unannotated,
vars = c("ident", "sample")) %>%
dplyr::count(ident, sample) %>%
tidyr::spread(ident, n)
sample_ncells
## sample 0 1 2 3 4 5 6 7
## 1 E3.14M.F 100 81 25 24 20 14 8 6
## 2 E3.14M.M 135 43 60 51 32 22 14 10
## 3 E3.2M.F 51 42 71 26 24 34 31 12
## 4 E3.2M.M 115 67 47 32 28 23 17 15
## 5 E4.14M.F 232 139 101 81 40 28 17 10
## 6 E4.14M.M 156 65 64 56 30 22 21 10
## 7 E4.2M.F 72 38 17 26 8 12 7 14
## 8 E4.2M.M 69 30 20 23 20 8 3 5
write.table(sample_ncells,
"../../results/reclusterTcell/numCells/cells_per_cluster_per_sample_unannotated.tsv",
quote = FALSE, sep = "\t", row.names = FALSE)
# age
age_ncells <- FetchData(tcells.unannotated,
vars = c("ident", "age")) %>%
dplyr::count(ident, age) %>%
tidyr::spread(ident, n)
age_ncells
## age 0 1 2 3 4 5 6 7
## 1 14 months 623 328 250 212 122 86 60 36
## 2 2 months 307 177 155 107 80 77 58 46
write.table(age_ncells,
"../../results/reclusterTcell/numCells/cells_per_cluster_per_age_unannotated.tsv",
quote = FALSE, sep = "\t", row.names = FALSE)
# sex
sex_ncells <- FetchData(tcells.unannotated,
vars = c("ident", "sex")) %>%
dplyr::count(ident, sex) %>%
tidyr::spread(ident, n)
sex_ncells
## sex 0 1 2 3 4 5 6 7
## 1 Female 455 300 214 157 92 88 63 42
## 2 Male 475 205 191 162 110 75 55 40
write.table(sex_ncells,
"../../results/reclusterTcell/numCells/cells_per_cluster_per_sex_unannoated.tsv",
quote = FALSE, sep = "\t", row.names = FALSE)
# isoform
isoform_ncells <- FetchData(tcells.unannotated,
vars = c("ident", "isoform")) %>%
dplyr::count(ident, isoform) %>%
tidyr::spread(ident, n)
isoform_ncells
## isoform 0 1 2 3 4 5 6 7
## 1 E3 401 233 203 133 104 93 70 43
## 2 E4 529 272 202 186 98 70 48 39
write.table(isoform_ncells,
"../../results/reclusterTcell/numCells/cells_per_cluster_per_isoform_unannotated.tsv",
quote = FALSE, sep = "\t", row.names = FALSE)
# phase
phase_ncells <- FetchData(tcells.unannotated,
vars = c("ident", "Phase")) %>%
dplyr::count(ident, Phase) %>%
tidyr::spread(ident, n)
phase_ncells
## Phase 0 1 2 3 4 5 6 7
## 1 G1 252 136 86 23 78 40 12 8
## 2 G2M 241 175 159 133 57 65 43 23
## 3 S 437 194 160 163 67 58 63 51
write.table(phase_ncells,
"../../results/reclusterTcell/numCells/cells_per_cluster_per_phase_unannotated.tsv",
quote = FALSE, sep = "\t", row.names = FALSE)
Cluster tree
Idents(tcells.unannotated) <- tcells.unannotated$seurat_clusters
tcells.unannotated <- BuildClusterTree(object = tcells.unannotated,
dims = 1:15,
reorder = FALSE,
reorder.numeric = FALSE)
tree <- tcells.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)
p <- ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = cluster_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
p

Stacked violins
goi1 <- c("Pdgfrb","Rgs5","Pecam1","Vwf","Cldn5","Kdr","Prox1","Flt4","Ptprc",
"Itgam","Aif1","Mrc1","H2-Eb1","Itgax","Ccr2","Ly6c2","Ly6g","Kit",
"Mki67","Col1a2","Plp1","Trbc2","Gata3","Il7r","Cd19","Ms4a1")
goi2 <- c("Kdr","Flt4","Prox1","Lyve1","Nrp2","Itga9","Itgb1","Grb2","Tnc","Plcg1","Crk")
goi3 <- c("Vegfc","Vegfd","Nrp2","Itga9","Itgb1","Grb2","Vcam1","Fn1","Crk","Sema3c","Sema3f")
Idents(tcells.unannotated) <- "seurat_clusters"
v1 <- VlnPlot(tcells.unannotated,
features = goi1,
split.by = 'seurat_clusters',
cols = cluster_colors,
flip = TRUE,
stack = TRUE) +
theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v1
v2 <- VlnPlot(tcells.unannotated,
features = goi2,
cols = isoform_colors,
split.by = "isoform",
group.by = "seurat_clusters",
flip = TRUE,
stack = TRUE)
v2
v3 <- VlnPlot(tcells.unannotated,
features = goi2,
cols = sex_colors,
split.by = "sex",
group.by = "seurat_clusters",
flip = TRUE,
stack = TRUE)
v3
v4 <- VlnPlot(tcells.unannotated,
features = goi2,
cols = age_colors,
split.by = "age",
group.by = "seurat_clusters",
flip = TRUE,
stack = TRUE)
v4
v5 <- VlnPlot(tcells.unannotated,
features = goi3,
cols = isoform_colors,
split.by = "isoform",
group.by = "seurat_clusters",
flip = TRUE,
stack = TRUE)
v5
v6 <- VlnPlot(tcells.unannotated,
features = goi3,
cols = sex_colors,
split.by = "sex",
group.by = "seurat_clusters",
flip = TRUE,
stack = TRUE)
v6
v7 <- VlnPlot(tcells.unannotated,
features = goi3,
cols = age_colors,
split.by = "age",
group.by = "seurat_clusters",
flip = TRUE,
stack = TRUE)
v7
Differential expression
E4 vs E3 within each cluster
# intitialize variables
cell_types <- unique(tcells.unannotated$seurat_clusters)
isoform.df <- data.frame()
# loop through clusters
for (i in cell_types) {
print(i)
cluster <- subset(tcells.unannotated, seurat_clusters == i)
Idents(cluster) <- cluster$isoform
markers <- FindMarkers(object = cluster,
ident.1 = "E4",
ident.2 = "E3",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
if(nrow(markers) == 0) {
next
}
markers$cluster <- i
markers$gene <- rownames(markers)
isoform.df <- rbind(isoform.df, markers)
}
# reformat table
colnames(isoform.df)[c(3,4)] <- c("percent_E4","percent_E3")
rownames(isoform.df) <- 1:nrow(isoform.df)
isoform.df$percent_difference <- abs(isoform.df$percent_E4 - isoform.df$percent_E3)
isoform.df <- isoform.df[,c(6,7,1,5,2,3,4,8)]
# write table
write.table(isoform.df, "../../results/reclusterTcell/DEGs/E4_vs_E3_DEGs.tsv",
sep = "\t", quote = FALSE, row.names = FALSE)
Female vs male within each cluster
cell_types <- unique(tcells.unannotated$seurat_clusters)
sex.df <- data.frame()
for (i in cell_types) {
print(i)
cluster <- subset(tcells.unannotated, seurat_clusters == i)
Idents(cluster) <- cluster$sex
markers <- FindMarkers(object = cluster,
ident.1 = "Female",
ident.2 = "Male",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
if(nrow(markers) == 0) {
next
}
markers$cluster <- i
markers$gene <- rownames(markers)
sex.df <- rbind(sex.df, markers)
}
# reformat table
colnames(sex.df)[c(3,4)] <- c("percent_female","percent_male")
rownames(sex.df) <- 1:nrow(sex.df)
sex.df$percent_difference <- abs(sex.df$percent_female - sex.df$percent_male)
sex.df <- sex.df[,c(6,7,1,5,2,3,4,8)]
# write table
write.table(sex.df,
"../../results/reclusterTcell/DEGs/female_vs_male_DEGs.tsv", sep = "\t",
quote = FALSE, row.names = FALSE)
14 vs 2 months within each cluster
# initialize variables
cell_types <- unique(tcells.unannotated$seurat_clusters)
age.df <- data.frame()
# loop through clusters
for (i in cell_types) {
print(i)
cluster <- subset(tcells.unannotated, seurat_clusters == i)
Idents(cluster) <- cluster$age
markers <- FindMarkers(object = cluster,
ident.1 = "14 months",
ident.2 = "2 months",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
if(nrow(markers) == 0) {
next
}
markers$cluster <- i
markers$gene <- rownames(markers)
age.df <- rbind(age.df, markers)
}
# reformat table
colnames(age.df)[c(3,4)] <- c("percent_14mo","percent_2mo")
rownames(age.df) <- 1:nrow(age.df)
age.df$percent_difference <- abs(age.df$percent_14mo - age.df$percent_2mo)
age.df <- age.df[,c(6,7,1,5,2,3,4,8)]
# write table
write.table(age.df,
"../../results/reclusterTcell/DEGs/14_vs_2_months_DEGs.tsv", sep = "\t",
quote = FALSE, row.names = FALSE)
DEG tables and plots
Compare DEGs
# read tables
isoform.df <- read.table(
"../../results/reclusterTcell/DEGs/E4_vs_E3_DEGs.tsv",
sep = "\t", header = TRUE)
age.df <- read.table(
"../../results/reclusterTcell/DEGs/14_vs_2_months_DEGs.tsv",
sep = "\t", header = TRUE)
sex.df <- read.table(
"../../results/reclusterTcell/DEGs/female_vs_male_DEGs.tsv",
sep = "\t", header = TRUE)
# filter
isoform.df <- isoform.df[isoform.df$p_val_adj < 0.05,]
age.df <- age.df[age.df$p_val_adj < 0.05,]
sex.df <- sex.df[sex.df$p_val_adj < 0.05,]
# add columns
direction <- isoform.df$avg_log2FC > 0
direction <- gsub(TRUE, "isoform_up", direction)
direction <- gsub(FALSE, "isoform_down", direction)
isoform.df$direction <- direction
direction <- age.df$avg_log2FC > 0
direction <- gsub(TRUE, "age_up", direction)
direction <- gsub(FALSE, "age_down", direction)
age.df$direction <- direction
direction <- sex.df$avg_log2FC > 0
direction <- gsub(TRUE, "sex_up", direction)
direction <- gsub(FALSE, "sex_down", direction)
sex.df$direction <- direction
# reformat tables
isoform.df2 <- isoform.df %>%
dplyr::count(cluster,direction) %>%
tidyr::spread(cluster, n)
age.df2 <- age.df %>%
dplyr::count(cluster,direction) %>%
tidyr::spread(cluster, n)
sex.df2 <- sex.df %>%
dplyr::count(cluster,direction) %>%
tidyr::spread(cluster, n)
# master table
df <- smartbind(isoform.df2, sex.df2, age.df2)
df
Upset plot
# read tables
isoform.df <- read.table("../../results/reclusterTcell/DEGs/E4_vs_E3_DEGs.tsv",
sep = "\t", header = TRUE)
age.df <- read.table("../../results/reclusterTcell/DEGs/14_vs_2_months_DEGs.tsv",
sep = "\t", header = TRUE)
sex.df <- read.table("../../results/reclusterTcell/DEGs/female_vs_male_DEGs.tsv",
sep = "\t", header = TRUE)
# filter tables
isoform.df <- isoform.df[isoform.df$p_val_adj < 0.05,]
age.df <- age.df[age.df$p_val_adj < 0.05,]
sex.df <- sex.df[sex.df$p_val_adj < 0.05,]
clusters <- unique(tcells.unannotated$seurat_clusters)
for (i in clusters) {
# Subset df by cluster
isoform <- subset(isoform.df, isoform.df$cluster == i)
sex <- subset(sex.df, sex.df$cluster == i)
age <- subset(age.df, age.df$cluster == i)
# Subset lists
isoform_up <- subset(isoform$gene, isoform$avg_log2FC > 0)
isoform_down <- subset(isoform$gene, isoform$avg_log2FC < 0)
sex_up <- subset(sex$gene, sex$avg_log2FC > 0)
sex_down <- subset(sex$gene, sex$avg_log2FC < 0)
age_up <- subset(age$gene, age$avg_log2FC > 0)
age_down <- subset(age$gene, age$avg_log2FC < 0)
list_input <- list("Age Up-regulated" = age_up,
"Isoform Up-regulated" = isoform_up,
"Sex Up-regulated" = sex_up,
"Age Down-regulated" = age_down,
"Isoform Down-regulated" = isoform_down,
"Sex Down-regulated" = sex_down)
data <- fromList(list_input)
# store names
names <- c("Sex Down-regulated","Isoform Down-regulated","Age Down-regulated",
"Sex Up-regulated","Isoform Up-regulated","Age Up-regulated")
# plot
upset_gene <- ComplexUpset::upset(data,
names,
set_sizes=(
upset_set_size()
+ geom_text(aes(label=..count..), hjust=1.1, stat='count')
+ expand_limits(y=150)),
queries = list(upset_query("Age Up-regulated", fill = "red"),
upset_query("Isoform Up-regulated", fill = "red"),
upset_query("Sex Up-regulated", fill = "red"),
upset_query("Age Down-regulated", fill = "blue"),
upset_query("Isoform Down-regulated", fill = "blue"),
upset_query("Sex Down-regulated", fill = "blue")),
base_annotations = list('Intersection size' = (
intersection_size(bar_number_threshold=1, width=0.1)
+ scale_y_continuous(expand=expansion(mult=c(0, 0.05)),limits = c(0,150)) # space on top
+ theme(
# hide grid lines
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
# show axis lines
axis.line=element_line(colour='black')))),
stripes = upset_stripes(
geom=geom_segment(size=12), # make the stripes larger
colors=c('grey95', 'white')),
# to prevent connectors from getting the colorured
# use `fill` instead of `color`, together with `shape='circle filled'`
matrix = intersection_matrix(
geom=geom_point(
shape='circle filled',
size=3,
stroke=0.45)),
sort_sets=FALSE,
sort_intersections='descending'
)
upset_gene <- upset_gene + ggtitle(paste0(i,", adj_p_val < 0.05"))
i <- gsub(" ","_",i)
i <- gsub("/","_",i)
i <- gsub("-","_",i)
pdf(paste0("../../results/reclusterTcell/upset/upset_",tolower(i),".pdf"), height = 6, width = 8)
print(upset_gene)
dev.off()
}
Volcano
variables <- c("sex","age","isoform")
all_clusters <- unique(tcells.unannotated$seurat_clusters)
for (i in variables) {
# read DEG file
if (i == "sex") {
treatment_vs_control <-
read.delim("../../results/reclusterTcell/DEGs/female_vs_male_DEGs.tsv",
sep = "\t")
} else if (i == "age") {
treatment_vs_control <-
read.delim("../../results/reclusterTcell/DEGs/14_vs_2_months_DEGs.tsv",
sep = "\t")
} else {
treatment_vs_control <-
read.delim("../../results/reclusterTcell/DEGs/E4_vs_E3_DEGs.tsv",
sep = "\t")
}
# assign colors
color_values <- vector()
max <- nrow(treatment_vs_control)
for(row in 1:max){
if (treatment_vs_control$p_val_adj[row] < 0.05){
if (treatment_vs_control$avg_log2FC [row] > 0){
color_values <- c(color_values, 1) # 1 when logFC > 0 and FDRq < 0.05
}
else if (treatment_vs_control$avg_log2FC[row] < 0){
color_values <- c(color_values, 2) # 2 when logFC < 0 and FDRq < 0.05
}
}
else{
color_values <- c(color_values, 3) # 3 when FDRq >= 0.05
}
}
treatment_vs_control$color_adjpval_0.05 <- factor(color_values)
# loop through clusters
for (j in all_clusters) {
# subset cluster
data <- subset(treatment_vs_control, cluster == j)
# plot only if there are DEGs with p_val_adj < 0.05
num <- subset(data, p_val_adj < 0.05)
num <- nrow(num)
if(num != 0) {
# subset genes to label
up <- data[data$color_adjpval_0.05 == 1,]
up10 <- up[1:10,]
down <- data[data$color_adjpval_0.05 == 2,]
down10 <- down[1:10,]
# set manual colors
if (!1 %in% unique(data$color_adjpval_0.05)) {
my_colors <- c("blue","gray")
} else if (!2 %in% unique(data$color_adjpval_0.05)) {
my_colors <- c("red","gray")
} else if (!1 %in% unique(data$color_adjpval_0.05) && !2 %in% unique(data$color_adjpval_0.05)) {
my_colors <- c("gray")
} else {
my_colors <- c("red","blue","gray")
}
# set significance threshold
hadjpval <- (-log10(max(
data$p_val[data$p_val_adj < 0.05],
na.rm=TRUE)))
# plot
p <-
ggplot(data = data,
aes(x = avg_log2FC, # x-axis is logFC
y = -log10(p_val), # y-axis will be -log10 of P.Value
color = color_adjpval_0.05)) + # color is based on factored color column
geom_point(alpha = 0.8, size = 2) + # create scatterplot, alpha makes points transparent
theme_bw() + # set color theme
theme(legend.position = "none") + # no legend
scale_color_manual(values = my_colors) + # set factor colors
labs(
title = "", # no main title
x = expression(log[2](FC)), # x-axis title
y = expression(-log[10] ~ "(" ~ italic("p") ~ "-value)") # y-axis title
) +
theme(axis.title.x = element_text(size = 10),
axis.text.x = element_text(size = 10)) +
theme(axis.title.y = element_text(size = 10),
axis.text.y = element_text(size = 10)) +
geom_hline(yintercept = hadjpval, # horizontal line
colour = "#000000",
linetype = "dashed") +
ggtitle(paste0(j,"\n",i,", p_val_adj < 0.05")) +
geom_text_repel(data = up10,
aes(x = avg_log2FC, y= -log10(p_val), label = gene),
color = "maroon",
fontface="italic",
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
) +
geom_text_repel(data = down10,
aes(x = avg_log2FC, y= -log10(p_val), label = gene),
color = "navyblue",
fontface="italic",
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
)
p
# save
clus <- j
clus <- gsub(" ","_",clus)
clus <- gsub("/","_",clus)
clus <- gsub("-","_",clus)
if (i == "sex") {
path <- paste0("../../results/reclusterTcell/volcano/sex/",
tolower(clus),"_sex_volcano")
} else if (i == "age") {
path <- paste0("../../results/reclusterTcell/volcano/age/",
tolower(clus),"_age_volcano")
} else {
path <- paste0("../../results/reclusterTcell/volcano/isoform/",
tolower(clus),"_isoform_volcano")
}
pdf(paste(path,".pdf"), height = 8, width = 8)
print(p)
dev.off()
print(paste("i =",i,", j =",j))
}
} # end loop through clusters
} # end loop through variables